*************************
**# A. Routineness IPUMS
*************************

***********************************
**## A.1 Importing IPUMS 1970 data
***********************************
use "analysis/ipums_1970_1990.dta", clear

keep if year==1980
keep if age >= 22 & age <= 65 & empstat == 1

collapse (sum) perwt, by(occ1990 ind1990)

*Share of an industry's employment accounted for by an occupation
egen ind_tot = total(perwt), by(ind1990)
gen ind_share = perwt/ind_tot
	* occ share of ind emp

preserve
collapse (sum) perwt, by(ind1990)
save "processing/ind1990_emp", replace
restore

keep occ1990 ind1990 ind_share
save "processing/occ1990_ind1990_weights", replace

	
**************************************
**## A.2 Concord occ199odd to ind1990
**************************************
* The above is a concordance between IPUMS occ1990 and ind1990. Autor and Dorn
* construct routineness at the occ1990dd level, so we first need to concord from
* occ1990dd to IPUMS occ1990

* Concord from occ1990dd to IPUMS occ1990
use "analysis/occ1990_occ1990dd_GLL.dta", clear
keep if _m==3
drop _merge
save "processing/occ1990_occ1990dd_GLL_temp.dta", replace

* Routineness data
use "raw/occ1990dd_alm.dta", clear
merge 1:m occ1990dd using "processing/occ1990_occ1990dd_GLL_temp"
assert _m==3
drop _m

drop if occ1990 == .

collapse RTIa, by(occ1990)

* Concord to ind1990
merge 1:m occ1990 using "processing/occ1990_ind1990_weights"
	* m=1: 2 occ1990 codes (of 384 occ1990 codes) we have oc1990 RTI data but no occ1990 IPUMS emp data
	* m=2: 1 occ1990 code with no occ1990 RTI data even though have occ1990 IPUMS emp data (905 military)		
keep if _m == 3

egen scale = total(ind_share), by(ind1990)
replace ind_share = ind_share/scale
drop scale _merge

replace RTIa = RTIa*ind_share

collapse (sum) RTIa, by(ind1990)
save "processing/routine_ind1990", replace

***********************
**# C. Routineness SIC 
***********************
* ind 1990 data
use "processing/routine_ind1990.dta", clear
merge 1:1 ind1990 using "processing/ind1990_emp.dta"
drop if ind1990==0
	* "N/A Not applicable"
assert _m==3
drop _merge

* Concord to ind1990ddx
merge 1:m ind1990 using "raw/cw_ind1990_ind1990ddx.dta"
assert _m!=1
	* m=2: 9  indd1990ddx codes that in concordance but don't have RTI data
keep if _m == 3
drop _m

egen totperwt = total(perwt), by(ind1990ddx)
gen share = perwt/totperwt

replace RTIa = RTIa*share

collapse (sum) RTIa, by(ind1990ddx)

* Concord to SIC87
mmerge ind1990ddx using "raw/cw_sic87_ind1990ddx", type(1:n) uif(inrange(sic,2000,4000))
assert _m!=2
	* m=1: 145 ind1990ddx codes in RTI data but do not reach sic *manuf* code via concordance
keep if _m == 3
drop _merge

gen rti = RTIa
keep sic rti
rename sic87 sic
label var rti "rti 1980, sic"
save "processing/sic_routineness.dta", replace
